library(tidyverse)library(patchwork)library(maps) # Original maps package (mostly outdated, but can still be used for simple maps)library(usmap) # US map that includes Alaska and Hawaii library(mapdata) # Extra map databases (supplment to maps package; world map is out-dated)library(ggmap) # Allows use of Stamen and Google maps with ggplot2library(sf) # Simple Features for R (modern way to work with spatial data)
Create a data frame from the maps package databases
Code
us_states <-map_data("state")us_states
Using Data from the maps Package in ggplot2 (2/3)
Create a map of the US states using ggplot2
Code
ggplot(us_states, aes(x = long, y = lat, group = group)) +# make sure to include "group" in the aesthetics geom_polygon(fill ="white", color ="black") +# borders on the map are just a collection of polygons; each polygon is a grouptheme_void()
The scaling is off as ggplot does not know how to scale the map.
Using Data from the maps Package in ggplot2 (3/3)
Two solutions for scaling the map
1. Use the coord_fixed() function
Code
ggplot(us_states, aes(x = long, y = lat, group = group)) +geom_polygon(fill ="white", color ="black") +theme_void() +coord_fixed(1.3)
2. Use the coord_map() function
Code
ggplot(us_states, aes(x = long, y = lat, group = group)) +geom_polygon(fill ="white", color ="black") +theme_void() +coord_map()
Add Cities to the Map (1/4)
Create a data frame of cities in the US usign the us.cities data set
Code
us_cities <-as_tibble(us.cities)us_cities
Add Cities to the Map (2/4)
Create a map of the US states with cities using ggplot2
Code
us_states |>ggplot(aes(x = long, y = lat)) +geom_polygon(aes(group = group), fill ="white", color ="black") +geom_point(data = us_cities, aes(x = long, y = lat), color ="blue", size =1) +theme_void() +coord_map()
Add Cities to the Map (3/4)
What’s a quick way to remove Alaska and Hawaii cities from the map?
Code
cities_lower48 <- us_cities |>filter(!(country.etc %in%c("AK", "HI")))us_states |>ggplot(aes(x = long, y = lat)) +geom_polygon(aes(group = group), fill ="white", color ="black") +geom_point(data = cities_lower48, aes(x = long, y = lat), color ="blue", size =1) +theme_void() +coord_map()
Add Cities to the Map (4/4)
Add state capitals and the national capital as unique points
Code
state_capitals <- cities_lower48 |>filter(capital ==2)us_capital <- us_cities |>filter(name =="WASHINGTON DC")us_states |>ggplot(aes(x = long, y = lat)) +geom_polygon(aes(group = group), fill ="white", color ="black") +geom_point(data = cities_lower48, aes(x = long, y = lat), color ="blue", size =1, shape =1) +geom_point(data = state_capitals, aes(x = long, y = lat), color ="red", size =2) +geom_point(data = us_capital, aes(x = long, y = lat), fill ="green", size =3, shape =23) +theme_void() +coord_map()
Map projections
Code
us_states |>ggplot(aes(x = long, y = lat)) +geom_polygon(aes(group = group), fill ="white", color ="black") +geom_point(data = cities_lower48, aes(x = long, y = lat), color ="blue", shape =".") +coord_map("albers", lat0 =30, lat1 =45)
Maps that Include Alaska and Hawaii (1/2)
Code
usmap::plot_usmap(regions ="states")
Maps that Include Alaska and Hawaii (2/2)
the usmap::plot_usmap() function returns a ggplot object
The word “choropleth” has an interesting etymology rooted in Greek. It’s a compound word formed from two Greek elements:
“Choro-” (χώρο-): This prefix comes from the Greek word “choros” (χώρος), which means “area” or “region.”
“-pleth” (-πλήθ): This suffix is derived from the Greek word “plethos” (πλῆθος), meaning “multitude” or “quantity.”
When combined, these elements create “choropleth,” which essentially means “area quantity” or “region value.” This name aptly describes the nature of choropleth maps, which display quantities or values across different geographic areas.
The term “choropleth” was coined in 1938 by John Kirtland Wright, an American geographer. Wright introduced this term in his doctoral dissertation at Harvard University, titled “Notes on Statistical Mapping, With Special Reference to the Mapping of Population Phenomena.”
This relatively recent coinage explains why the term might seem unfamiliar to many people, despite the widespread use of choropleth maps in various fields today.
Inspect the us_map() Data Frame
Note, we do not have a data frame with latitude and longitude columns
Code
state_df <-us_map()state_df
Adding Cities to the usmap Map
A convenience method called usmap_transform can be used to add cities (or other points) to the map.
The word “choropleth” has an interesting etymology rooted in Greek. It’s a compound word formed from two Greek elements:
“Choro-” (χώρο-): This prefix comes from the Greek word “choros” (χώρος), which means “area” or “region.”
“-pleth” (-πλήθ): This suffix is derived from the Greek word “plethos” (πλῆθος), meaning “multitude” or “quantity.”
When combined, these elements create “choropleth,” which essentially means “area quantity” or “region value.” This name aptly describes the nature of choropleth maps, which display quantities or values across different geographic areas.
The term “choropleth” was coined in 1938 by John Kirtland Wright, an American geographer. Wright introduced this term in his doctoral dissertation at Harvard University, titled “Notes on Statistical Mapping, With Special Reference to the Mapping of Population Phenomena.”
This relatively recent coinage explains why the term might seem unfamiliar to many people, despite the widespread use of choropleth maps in various fields today.
Add a Choropleth Layer to the Map
State Population Data
Code
plot_usmap(data = statepop, values ="pop_2022", color ="white") +scale_fill_continuous(low ="black", high ="red",name ="Population (2022)",label = scales::comma,trans ="log10") +theme(legend.position ="right")
Shapefiles are a popular geospatial vector data format developed by Esri. They are used to store the geometric location and attribute information of geographic features. Shapefiles consist of several files with the same name but different extensions, such as .shp, .shx, .dbf, and .prj.
The .shp file contains the geometry data, such as points, lines, or polygons, that represent the geographic features.
The .shx file is an index file that helps speed up access to the geometry data.
The .dbf file stores the attribute data associated with the geographic features, such as names, population counts, or other information.
The .prj file contains the coordinate system information used to project the data onto a map.
# Read the shapefileut_shapefile <-st_read("shapefiles/tl_2024_49_tract/tl_2024_49_tract.shp")
Reading layer `tl_2024_49_tract' from data source
`/Users/ray/Documents/Documents - RAY’s Mac Studio/MST6600_Fall2024/shapefiles/tl_2024_49_tract/tl_2024_49_tract.shp'
using driver `ESRI Shapefile'
Simple feature collection with 716 features and 13 fields
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: -114.0529 ymin: 36.99766 xmax: -109.0416 ymax: 42.0017
Geodetic CRS: NAD83
Code
# Filter the shapefile to Salt Lake County (FIPS code: 035)sl_county_sf <- ut_shapefile |>filter(COUNTYFP =="035")sl_county_sf
Using ggplot with Shapefiles
Code
sl_county_sf |>ggplot() +geom_sf() +labs(title ="Salt Lake County Shapefile Plot",subtitle ="Census Tracts ") +theme_void()
Adding Data to the Shapefile
Download median household income data from the 5-Year American Community Survey (ACS)
Code
# Load required librarieslibrary(tidycensus)# Set your Census API key# You need to sign up for an API key at http://api.census.gov/data/key_signup.html# census_api_key("YOUR_API_KEY_HERE", install = TRUE)# Define parametersyear <-2022# Use the most recent year availablestate <-"UT"county <-"Salt Lake"# Get median household income datasalt_lake_income <-get_acs(geography ="tract",variables ="B19013_001", # Median household income variablestate = state,county = county,year = year,survey ="acs5"# 5-year estimates)# View the datasalt_lake_income
Joining Two Data Frames: Example 1
Code
# Create two data framesdf1 <-tibble(id =c(1, 2, 3), value =c(10, 20, 30))df2 <-tibble(id =c(1, 2, 3), value =c(100, 200, 300))# Join the data framesdf_new <- df1 |>left_join(df2, by ="id")df1
Code
df2
Code
df_new
Joining Two Data Frames: Example 2
Code
# Create two data frames with different IDsdf1 <-tibble(id =c(1, 2, 3), value =c(10, 20, 30))df2 <-tibble(id =c(2, 3, 4), value =c(100, 200, 300))# Join the data framesdf_new <- df1 |>left_join(df2, by ="id")df1
Code
df2
Code
df_new
Joining Two Data Frames: Example 3
Code
# Create two data frames with different IDsdf1 <-tibble(id =c(1, 2, 3), value =c(10, 20, 30))df2 <-tibble(id =c(2, 3, 4), value =c(100, 200, 300))# Join the data framesdf_new <- df1 |>inner_join(df2, by ="id")df1
Code
df2
Code
df_new
Joining Two Data Frames: Example 4
Code
# Create two data frames with different IDsdf1 <-tibble(id =c(1, 2, 3), value =c(10, 20, 30))df2 <-tibble(id =c(2, 3, 4), value =c(100, 200, 300))# Join the data framesdf_new <- df1 |>full_join(df2, by ="id")df1
Code
df2
Code
df_new
Join the Data to the Shapefile
Code
sl_county_tracts_income <- sl_county_sf |>left_join(salt_lake_income, by ="GEOID")sl_county_tracts_income
Create a Choropleth Map
Code
sl_county_tracts_income |>ggplot() +geom_sf(aes(fill = estimate)) +scale_fill_viridis_c(name ="Median Household\nIncome",label = scales::dollar,trans ="log10",breaks =c(30000, 60000, 120000, 240000), ) +theme_minimal() +labs(title ="Salt Lake County American Community Survey Data",subtitle ="2018 \u2013 2022; Table B19013_001" ) +theme_void()